1   package org.apache.lucene.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.io.IOException;
21  import java.util.Collections;
22  import java.util.IdentityHashMap;
23  import java.util.Iterator;
24  import java.util.Map;
25  import java.util.Set;
26  import java.util.SortedMap;
27  import java.util.TreeMap;
28  
29  import org.apache.lucene.util.Bits;
30  
31  /** An {@link LeafReader} which reads multiple, parallel indexes.  Each index
32   * added must have the same number of documents, but typically each contains
33   * different fields. Deletions are taken from the first reader.
34   * Each document contains the union of the fields of all documents
35   * with the same document number.  When searching, matches for a
36   * query term are from the first index added that has the field.
37   *
38   * <p>This is useful, e.g., with collections that have large fields which
39   * change rarely and small fields that change more frequently.  The smaller
40   * fields may be re-indexed in a new index and both indexes may be searched
41   * together.
42   * 
43   * <p><strong>Warning:</strong> It is up to you to make sure all indexes
44   * are created and modified the same way. For example, if you add
45   * documents to one index, you need to add the same documents in the
46   * same order to the other indexes. <em>Failure to do so will result in
47   * undefined behavior</em>.
48   */
49  public class ParallelLeafReader extends LeafReader {
50    private final FieldInfos fieldInfos;
51    private final ParallelFields fields = new ParallelFields();
52    private final LeafReader[] parallelReaders, storedFieldsReaders;
53    private final Set<LeafReader> completeReaderSet =
54      Collections.newSetFromMap(new IdentityHashMap<LeafReader,Boolean>());
55    private final boolean closeSubReaders;
56    private final int maxDoc, numDocs;
57    private final boolean hasDeletions;
58    private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>();
59    private final SortedMap<String,LeafReader> tvFieldToReader = new TreeMap<>();
60    
61    /** Create a ParallelLeafReader based on the provided
62     *  readers; auto-closes the given readers on {@link #close()}. */
63    public ParallelLeafReader(LeafReader... readers) throws IOException {
64      this(true, readers);
65    }
66  
67    /** Create a ParallelLeafReader based on the provided
68     *  readers. */
69    public ParallelLeafReader(boolean closeSubReaders, LeafReader... readers) throws IOException {
70      this(closeSubReaders, readers, readers);
71    }
72  
73    /** Expert: create a ParallelLeafReader based on the provided
74     *  readers and storedFieldReaders; when a document is
75     *  loaded, only storedFieldsReaders will be used. */
76    public ParallelLeafReader(boolean closeSubReaders, LeafReader[] readers, LeafReader[] storedFieldsReaders) throws IOException {
77      this.closeSubReaders = closeSubReaders;
78      if (readers.length == 0 && storedFieldsReaders.length > 0)
79        throw new IllegalArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
80      this.parallelReaders = readers.clone();
81      this.storedFieldsReaders = storedFieldsReaders.clone();
82      if (parallelReaders.length > 0) {
83        final LeafReader first = parallelReaders[0];
84        this.maxDoc = first.maxDoc();
85        this.numDocs = first.numDocs();
86        this.hasDeletions = first.hasDeletions();
87      } else {
88        this.maxDoc = this.numDocs = 0;
89        this.hasDeletions = false;
90      }
91      Collections.addAll(completeReaderSet, this.parallelReaders);
92      Collections.addAll(completeReaderSet, this.storedFieldsReaders);
93      
94      // check compatibility:
95      for(LeafReader reader : completeReaderSet) {
96        if (reader.maxDoc() != maxDoc) {
97          throw new IllegalArgumentException("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc());
98        }
99      }
100     
101     // TODO: make this read-only in a cleaner way?
102     FieldInfos.Builder builder = new FieldInfos.Builder();
103     // build FieldInfos and fieldToReader map:
104     for (final LeafReader reader : this.parallelReaders) {
105       final FieldInfos readerFieldInfos = reader.getFieldInfos();
106       for (FieldInfo fieldInfo : readerFieldInfos) {
107         // NOTE: first reader having a given field "wins":
108         if (!fieldToReader.containsKey(fieldInfo.name)) {
109           builder.add(fieldInfo);
110           fieldToReader.put(fieldInfo.name, reader);
111           if (fieldInfo.hasVectors()) {
112             tvFieldToReader.put(fieldInfo.name, reader);
113           }
114         }
115       }
116     }
117     fieldInfos = builder.finish();
118     
119     // build Fields instance
120     for (final LeafReader reader : this.parallelReaders) {
121       final Fields readerFields = reader.fields();
122       for (String field : readerFields) {
123         // only add if the reader responsible for that field name is the current:
124         if (fieldToReader.get(field) == reader) {
125           this.fields.addField(field, readerFields.terms(field));
126         }
127       }
128     }
129 
130     // do this finally so any Exceptions occurred before don't affect refcounts:
131     for (LeafReader reader : completeReaderSet) {
132       if (!closeSubReaders) {
133         reader.incRef();
134       }
135       reader.registerParentReader(this);
136     }
137   }
138 
139   @Override
140   public String toString() {
141     final StringBuilder buffer = new StringBuilder("ParallelLeafReader(");
142     for (final Iterator<LeafReader> iter = completeReaderSet.iterator(); iter.hasNext();) {
143       buffer.append(iter.next());
144       if (iter.hasNext()) buffer.append(", ");
145     }
146     return buffer.append(')').toString();
147   }
148 
149   @Override
150   public void addCoreClosedListener(CoreClosedListener listener) {
151     addCoreClosedListenerAsReaderClosedListener(this, listener);
152   }
153 
154   @Override
155   public void removeCoreClosedListener(CoreClosedListener listener) {
156     removeCoreClosedListenerAsReaderClosedListener(this, listener);
157   }
158 
159   // Single instance of this, per ParallelReader instance
160   private final class ParallelFields extends Fields {
161     final Map<String,Terms> fields = new TreeMap<>();
162     
163     ParallelFields() {
164     }
165     
166     void addField(String fieldName, Terms terms) {
167       fields.put(fieldName, terms);
168     }
169     
170     @Override
171     public Iterator<String> iterator() {
172       return Collections.unmodifiableSet(fields.keySet()).iterator();
173     }
174     
175     @Override
176     public Terms terms(String field) {
177       return fields.get(field);
178     }
179     
180     @Override
181     public int size() {
182       return fields.size();
183     }
184   }
185   
186   /**
187    * {@inheritDoc}
188    * <p>
189    * NOTE: the returned field numbers will likely not
190    * correspond to the actual field numbers in the underlying
191    * readers, and codec metadata ({@link FieldInfo#getAttribute(String)}
192    * will be unavailable.
193    */
194   @Override
195   public FieldInfos getFieldInfos() {
196     return fieldInfos;
197   }
198   
199   @Override
200   public Bits getLiveDocs() {
201     ensureOpen();
202     return hasDeletions ? parallelReaders[0].getLiveDocs() : null;
203   }
204   
205   @Override
206   public Fields fields() {
207     ensureOpen();
208     return fields;
209   }
210   
211   @Override
212   public int numDocs() {
213     // Don't call ensureOpen() here (it could affect performance)
214     return numDocs;
215   }
216   
217   @Override
218   public int maxDoc() {
219     // Don't call ensureOpen() here (it could affect performance)
220     return maxDoc;
221   }
222   
223   @Override
224   public void document(int docID, StoredFieldVisitor visitor) throws IOException {
225     ensureOpen();
226     for (final LeafReader reader: storedFieldsReaders) {
227       reader.document(docID, visitor);
228     }
229   }
230   
231   @Override
232   public Fields getTermVectors(int docID) throws IOException {
233     ensureOpen();
234     ParallelFields fields = null;
235     for (Map.Entry<String,LeafReader> ent : tvFieldToReader.entrySet()) {
236       String fieldName = ent.getKey();
237       Terms vector = ent.getValue().getTermVector(docID, fieldName);
238       if (vector != null) {
239         if (fields == null) {
240           fields = new ParallelFields();
241         }
242         fields.addField(fieldName, vector);
243       }
244     }
245     
246     return fields;
247   }
248   
249   @Override
250   protected synchronized void doClose() throws IOException {
251     IOException ioe = null;
252     for (LeafReader reader : completeReaderSet) {
253       try {
254         if (closeSubReaders) {
255           reader.close();
256         } else {
257           reader.decRef();
258         }
259       } catch (IOException e) {
260         if (ioe == null) ioe = e;
261       }
262     }
263     // throw the first exception
264     if (ioe != null) throw ioe;
265   }
266 
267   @Override
268   public NumericDocValues getNumericDocValues(String field) throws IOException {
269     ensureOpen();
270     LeafReader reader = fieldToReader.get(field);
271     return reader == null ? null : reader.getNumericDocValues(field);
272   }
273   
274   @Override
275   public BinaryDocValues getBinaryDocValues(String field) throws IOException {
276     ensureOpen();
277     LeafReader reader = fieldToReader.get(field);
278     return reader == null ? null : reader.getBinaryDocValues(field);
279   }
280   
281   @Override
282   public SortedDocValues getSortedDocValues(String field) throws IOException {
283     ensureOpen();
284     LeafReader reader = fieldToReader.get(field);
285     return reader == null ? null : reader.getSortedDocValues(field);
286   }
287   
288   @Override
289   public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
290     ensureOpen();
291     LeafReader reader = fieldToReader.get(field);
292     return reader == null ? null : reader.getSortedNumericDocValues(field);
293   }
294 
295   @Override
296   public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
297     ensureOpen();
298     LeafReader reader = fieldToReader.get(field);
299     return reader == null ? null : reader.getSortedSetDocValues(field);
300   }
301 
302   @Override
303   public Bits getDocsWithField(String field) throws IOException {
304     ensureOpen();
305     LeafReader reader = fieldToReader.get(field);
306     return reader == null ? null : reader.getDocsWithField(field);
307   }
308 
309   @Override
310   public NumericDocValues getNormValues(String field) throws IOException {
311     ensureOpen();
312     LeafReader reader = fieldToReader.get(field);
313     NumericDocValues values = reader == null ? null : reader.getNormValues(field);
314     return values;
315   }
316 
317   @Override
318   public void checkIntegrity() throws IOException {
319     ensureOpen();
320     for (LeafReader reader : completeReaderSet) {
321       reader.checkIntegrity();
322     }
323   }
324 
325   /** Returns the {@link LeafReader}s that were passed on init. */
326   public LeafReader[] getParallelReaders() {
327     ensureOpen();
328     return parallelReaders;
329   }
330 }